#!/usr/bin/python
# -*-coding:utf-8-*-
import random
import time
from _socket import timeout
from urllib.error import HTTPError
from urllib.request import urlopen
from urllib.parse import unquote, quote
from urllib.request import Request
from urllib import parse
from bs4 import BeautifulSoup as bs
import pymysql.cursors
import os.path
import sys

from urllib3.util import timeout


class sc_tender:
    def __init__(self, tender_id=None, title=None, url=None, release_time=None, province_name="四川省", affiche_type=None):
        self.tender_id = tender_id
        self.title = title
        self.url = url
        self.release_time = release_time
        self.province_name = province_name
        self.affiche_type = affiche_type


def get_list_page(page_index, start_time, end_time=None):
    """
    获取四川招标网的招标信息列表
    """
    # 获取数据连接
    connection = pymysql.connect(host="127.0.0.1", user="root", password="root", db="common", charset="utf8mb4")
    cursor = connection.cursor()
    url = "http://www.ccgp-sichuan.gov.cn/CmsNewsController.do?method=search&years=2017&chnlNames=\\u6240\\u6709" \
          "&chnlCodes=&title=&tenderno=&agentname=&buyername=&startTime=%s&endTime=%s&distin_like=510000&city=&town=" \
          "&cityText=\\u8BF7\\u9009\\u62E9&townText=\\u8BF7\\u9009\\u62E9&searchKey=&distin=&type=&beginDate=" \
          "&endDate=&str1=&str2=&pageSize=10%s&searchResultForm=search_result_anhui.ftl" \
          % (start_time,end_time, "" if page_index <= 1 else ("&curPage=%s" % page_index))
    # 获得服务器返回的数据
    response = urlopen(url)
    # 处理数据
    page = response.read().decode("utf-8")
    soup = bs(page, "html.parser")
    for child in soup.select("div.list-info > div.info > ul > li"):
        tender = sc_tender()
        tender.title = child.select_one("a > div.title").text.strip()
        if tender.title.endswith('成交公告'):
            tender.affiche_type = '成交公告'

        tender.release_time = "%s-%s" % (
            child.select_one("div.time").text.strip()[-7:], child.select_one("div.time").text.strip()[0:-7])
        detail_url = child.select_one("a").get("href")
        if detail_url.startswith("http://202.61.88.152:9004"):
            # 数据来源：四川省网上竞价系统
            params = parse.parse_qs(parse.urlparse(detail_url).query)
            tender.tender_id = params["projectId"]
            tender.url = detail_url
        elif detail_url.startswith("http://202.61.88.152:9002") or detail_url.startswith("http://202.61.88.152:8006") \
                or detail_url.startswith("http://202.61.88.152:8007") \
                or detail_url.startswith("http://221.182.121.29:9002") \
                or detail_url.startswith("http://118.122.215.11:8006") \
                or detail_url.startswith("http://125.64.223.15:7705") \
                or detail_url.startswith("http://www.scncggzy.com.cn"):
            tender.tender_id = detail_url[detail_url.rindex("/") + 1:-5]
            tender.url = detail_url
        else:
            tender.tender_id = detail_url[detail_url.rindex("/") + 1:-5]
            tender.url = "http://www.ccgp-sichuan.gov.cn%s" % detail_url
        print("id=%s,date=%s, title= %s ,url= %s" % (tender.tender_id, tender.release_time, tender.title, tender.url))
        # 创建sql语句
        sql = "REPLACE INTO `tender_info`(`tender_id`,`title`,`release_time`,`url`,`province_name`) " \
              "values(%s,%s,%s,%s,%s)"
        # 执行sql语句
        cursor.execute(sql, (tender.tender_id, tender.title, tender.release_time, tender.url, tender.province_name))
        # 提交
        connection.commit()


if __name__ == '__main__':
    """
     2020年1月19日总页数 18634  https://developer.aliyun.com/topic/download?spm=a2c6h.12873639.0.0.4780461b8hHisz&id=37
    """
    start_time = '2020-07-01'
    end_time = '2020-07-28'
    start = 1
    end = 300
    for x in range(start, end):
        print("start page %s" % x)
        get_list_page(x, start_time, end_time)
